NETWORK

In [1]:
%matplotlib inline
import networkx as nx
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

1. Data

In [75]:
raw_data = pd.read_csv('Full-2020_05_10-13_14_09-X_raw_disease_50_target_100_7217_by_101.csv')
raw_data.head()
Out[75]:
Unnamed: 0 ENSG00000025039 ENSG00000048649 ENSG00000079459 ENSG00000102096 ENSG00000102580 ENSG00000103522 ENSG00000117151 ENSG00000117399 ENSG00000122786 ... ENSG00000279454 ENSG00000279774 ENSG00000279861 ENSG00000283202 ENSG00000284416 ENSG00000287234 ENSG00000287291 ENSG00000287329 ENSG00000287565 COVID
0 EFO_0000209 0.0 0.0 0.000 0.0 0.0 0.0 0.000000 0.000 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.001827
1 EFO_0000537 0.0 0.0 0.014 0.0 0.0 0.0 0.022289 0.006 0.064156 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.001435
2 EFO_0000760 0.0 0.0 0.000 0.0 0.0 0.0 0.000000 0.000 0.022000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.002511
3 EFO_0005109 0.0 0.0 0.000 0.0 0.0 0.0 0.000000 0.000 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.003721
4 EFO_0005194 0.0 0.0 0.000 0.0 0.0 0.0 0.000000 0.000 0.000000 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.000201

5 rows × 102 columns

In [3]:
disease_list = list(raw_data['Unnamed: 0'])
gene_list = list(raw_data.columns)[1:]
In [5]:
# processing names and codes
df_diseases_names = pd.read_csv('disease_list.csv')
d_codes = list(df_diseases_names.efo_id)
d_names = list(df_diseases_names.disease_full_name)
diseases_names = {d_codes[i]:d_names[i] for i in range(len(d_codes)) }
diseases_names['COVID'] = 'COVID'

df_genes_names = pd.read_csv('target_list.csv')
g_codes = list(df_genes_names.ensembl_id)
g_names = list(df_genes_names.hgnc_approved_symbol)
genes_names = {g_codes[i]:g_names[i] for i in range(len(g_codes)) }
genes_names['COVID'] = 'COVID'
In [6]:
# Data for Graph

data = []
for i in range(len(raw_data)):
    disease = raw_data.iloc[i]
    disease_name = diseases_names[disease['Unnamed: 0']]
    edges = [(disease_name, genes_names[gene], disease[gene]) for gene in gene_list if disease[gene] != 0]
    if len(edges) > 0:
        data.extend(edges)
In [7]:
# Graph Creation
Graph = nx.Graph()
diseases = np.unique([data[i][0] for i in range(len(data))])
genes = np.unique([data[i][1] for i in range(len(data))])
Graph.add_nodes_from(diseases, label = 'Disease')
Graph.add_nodes_from(genes, label = 'Gene')
Graph.add_weighted_edges_from(data)
In [8]:
nodelist = {}
nodelist['Diseases'] = diseases
nodelist['Genes'] = genes
In [78]:
# score each disease was associated to covid
score = list(raw_data.COVID)
covid_score = {diseases_names[disease_list[i]]: score[i] for i in range(len(score))}
In [167]:
covid_score_list = [(d,covid_score[d]) for d in covid_score]
covid_score_list.sort(key=custom_sort, reverse = True)

2. Data analysis

In [9]:
print('Number of nodes: {}'.format(len(Graph.nodes)))
print('Number of edges: {}'.format(len(Graph.edges)))
Number of nodes: 7265
Number of edges: 20541
In [10]:
# infos dict initialization
Graph_infos = {}
In [12]:
# Degree (number of edges at each node) for all nodes in order to have the main stats (max,min,meand,median,quantile) and plot

Graph_infos['Degrees'] = dict(nx.degree(Graph))

# only values
degrees_val = [Graph_infos['Degrees'][node] for node in Graph_infos['Degrees']]

#stats
degree_min = np.min(degrees_val)
print('Min degree connection: {}'.format(degree_min))
print('')

degree_max = np.max(degrees_val)
print('Max degree connection: {}'.format(degree_max))
print('')

degree_median = np.median(degrees_val)
print('Median degree connection: {}'.format(degree_median))
print('')

degree_mean = np.mean(degrees_val)
print('Mean degree connection: {}'.format(degree_mean))
print('')

degree_u_quant = np.quantile(degrees_val,0.75)
print('75% quantile degree connection: {}'.format(degree_u_quant))
print('')

degree_l_quant = np.quantile(degrees_val,0.25)
print('25% quantile degree connection: {}'.format(degree_l_quant))
print('')

# remove covid for histogram
degrees_val.remove(np.max(degrees_val))

# Degree histogram
plt.figure(figsize=(12,7)) 
sns.set(style = "whitegrid") 
 
sns.distplot(degrees_val,axlabel="Degree",kde=False)


plt.legend()
plt.yscale('log')
plt.title("Edges Connection Histogram, lin - log", fontsize = 20) # for histogram title
plt.savefig('Degrees_full.png')
plt.legend()
No handles with labels found to put in legend.
Min degree connection: 1

Max degree connection: 7217

Median degree connection: 1.0

Mean degree connection: 5.654783207157605

75% quantile degree connection: 3.0

25% quantile degree connection: 1.0

No handles with labels found to put in legend.
Out[12]:
<matplotlib.legend.Legend at 0x233ba0d0108>
In [15]:
# Degree histogram Diseases
degrees_val_disease = [Graph_infos['Degrees'][d] for d in diseases]


#stats
degree_min = np.min(degrees_val_disease)
print('Min degree connection: {}'.format(degree_min))
print('')

degree_max = np.max(degrees_val_disease)
print('Max degree connection: {}'.format(degree_max))
print('')

degree_median = np.median(degrees_val_disease)
print('Median degree connection: {}'.format(degree_median))
print('')

degree_mean = np.mean(degrees_val_disease)
print('Mean degree connection: {}'.format(degree_mean))
print('')

degree_u_quant = np.quantile(degrees_val_disease,0.75)
print('75% quantile degree connection: {}'.format(degree_u_quant))
print('')

degree_l_quant = np.quantile(degrees_val_disease,0.25)
print('25% quantile degree connection: {}'.format(degree_l_quant))
print('')

# remove covid for histogram
degrees_val_disease.remove(np.max(degrees_val_disease))

plt.figure(figsize=(16,9)) 
sns.set(style = "whitegrid")
 
sns.distplot(degrees_val_disease,axlabel="Degree",kde=False)
plt.axvline(degree_u_quant, color = 'red', linestyle = '--', label = '75% quantile: {}'.format(degree_u_quant))
plt.axvline(degree_l_quant, color = 'blue', linestyle = '--', label = '25% quantile: {}'.format(degree_l_quant))
plt.axvline(degree_median, color = 'green', linestyle = '--', label = 'Median: {}'.format(degree_median))
plt.axvline(degree_mean, color = 'yellow', linestyle = '--', label = 'Mean: {}'.format(round(degree_mean,2)))
plt.legend()

plt.yscale('log')
plt.title("Edges Connection Histogram Diseases, lin - log", fontsize = 20) # for histogram title
plt.savefig('Degrees_diseases.png')
plt.show()
Min degree connection: 1

Max degree connection: 32

Median degree connection: 1.0

Mean degree connection: 2.846196480532077

75% quantile degree connection: 3.0

25% quantile degree connection: 1.0

In [16]:
# Degree histogram Genes
degrees_val_genes = [Graph_infos['Degrees'][g] for g in genes]
print(max(degrees_val_genes))

#stats
degree_min = np.min(degrees_val_genes)
print('Min degree connection: {}'.format(degree_min))
print('')

degree_max = np.max(degrees_val_genes)
print('Max degree connection: {}'.format(degree_max))
print('')

degree_median = np.median(degrees_val_genes)
print('Median degree connection: {}'.format(degree_median))
print('')

degree_mean = np.mean(degrees_val_genes)
print('Mean degree connection: {}'.format(degree_mean))
print('')

degree_u_quant = np.quantile(degrees_val_genes,0.75)
print('75% quantile degree connection: {}'.format(degree_u_quant))
print('')

degree_l_quant = np.quantile(degrees_val_genes,0.25)
print('25% quantile degree connection: {}'.format(degree_l_quant))
print('')

# remove covid for histogram
degrees_val_genes.remove(np.max(degrees_val_genes))

plt.figure(figsize=(16,9)) 
sns.set(style = "whitegrid") 
 
sns.distplot(degrees_val_genes,axlabel="Degree",kde=False)
plt.axvline(degree_u_quant, color = 'red', linestyle = '--', label = '75% quantile: {}'.format(degree_u_quant))
plt.axvline(degree_l_quant, color = 'blue', linestyle = '--', label = '25% quantile: {}'.format(degree_l_quant))
plt.axvline(degree_median, color = 'green', linestyle = '--', label = 'Median: {}'.format(degree_median))
plt.axvline(degree_mean, color = 'yellow', linestyle = '--', label = 'Mean: {}'.format(round(degree_mean,2)))
plt.legend()

plt.yscale('log')
plt.title("Edges Connection Histogram Genes, yscale logarithmic", fontsize = 20) # for histogram title
plt.show()
7217
Min degree connection: 1

Max degree connection: 7217

Median degree connection: 123.5

Mean degree connection: 427.9375

75% quantile degree connection: 453.0

25% quantile degree connection: 3.75

Centrailities

In [18]:
Graph_centrality = {}

Commented for computational time, dataframe loaded below

Weighted centralities

Graph_centrality = {}

deg_cen = dict(Graph.degree(Graph, weight = 'weight')) Graph_centrality['Degree Centrality'] = deg_cen

clo_cen = nx.closeness_centrality(Graph) Graph_centrality['Closeness Centrality'] = clo_cen

bw_cen = nx.betweenness_centrality(Graph, weight = 'weight') Graph_centrality['Betweeness Centrality'] = bw_cen

pr_cen = nx.pagerank(Graph, weight = 'weight') Graph_centrality['Pagerank Centrality'] = pr_cen

eig_cen = nx.eigenvector_centrality(Graph, weight = 'weight')

Graph_centrality['Eigenvector Centrality'] = eig_cen

df_cen = pd.DataFrame.from_dict(Graph_centrality)

pd.DataFrame.to_csv(df_cen,'Centralities_weighted.csv')

In [19]:
centralities_w = pd.read_csv('Centralities_weighted.csv')
centralities_w_dict = centralities_w.to_dict('series')
In [20]:
centralities_w.head()
Out[20]:
Column1 Degree Centrality Closeness Centrality Betweeness Centrality Pagerank Centrality
0 ENSG00000169432 668.462446 0.371655 2.753300e-04 0.078392
1 ENSG00000103522 314.903177 0.367370 0.000000e+00 0.039254
2 ENSG00000187735 207.955171 0.350969 6.820000e-07 0.021354
3 ENSG00000165646 206.871757 0.369237 1.100601e-03 0.028321
4 ENSG00000213398 172.568817 0.374723 1.112031e-03 0.028632
In [21]:
centrality_w_names = list(centralities_w.columns)[1:]
disease_codes = list(centralities_w['Column1'])
In [22]:
Graph_centrality_w = {}
for col in centrality_w_names:
    Graph_centrality_w[col] = {disease_codes[i]: centralities_w_dict[col][i] for i in range(len(disease_codes))}
In [23]:
# Set appropriate names to genes since not preent in dataframe loaded
for cen in Graph_centrality_w:
    nodes = list(Graph_centrality_w[cen].keys())
    for node in nodes:
        try: 
            label = Graph.nodes[node]['label']
                
        except KeyError:
            print('error')
            gene_name = genes_names[node]
            
            Graph_centrality_w[cen][gene_name] = Graph_centrality_w[cen][node]
            print(Graph_centrality_w[cen][gene_name])
            del Graph_centrality_w[cen][node]
            
error
668.4624461
error
314.9031773
error
207.9551714
error
206.8717573
error
172.56881669999999
error
143.78162939999999
error
133.6433112
error
97.05999763
error
78.43858238
error
71.25107951
error
64.75367234
error
52.28954195
error
49.00863021
error
45.63681324
error
35.68519156
error
30.07005342
error
29.44025459
error
28.13992104
error
27.34125399
error
25.94482457
error
24.8436543
error
16.94291568
error
6.895714900000001
error
6.5616201489999995
error
4.930807589
error
3.806871475
error
3.325584778
error
2.835014316
error
2.62250298
error
1.9984705630000001
error
1.793884424
error
0.7116176040000001
error
0.36312
error
0.23848219
error
0.212946873
error
0.18454015699999998
error
0.125644758
error
0.111815253
error
0.11061411300000001
error
0.1089
error
0.106106202
error
0.08514
error
0.044843281
error
0.029626525
error
0.016165305
error
0.007527913000000001
error
0.005849236
error
0.371655155
error
0.367369645
error
0.350968739
error
0.369237025
error
0.374722724
error
0.36112354
error
0.371503094
error
0.349617365
error
0.34790938299999996
error
0.351138396
error
0.34366277100000003
error
0.341594169
error
0.35859209200000003
error
0.34526355799999997
error
0.34269000299999997
error
0.339740891
error
0.337907615
error
0.34275468299999995
error
0.355677423
error
0.344935657
error
0.3351945
error
0.345789499
error
0.336810868
error
0.340952828
error
0.335845393
error
0.33687334799999996
error
0.335938584
error
0.336094017
error
0.336904596
error
0.337217399
error
0.338885001
error
0.334269017
error
0.33334863
error
0.333165161
error
0.333685516
error
0.33371617600000003
error
0.333318038
error
0.333287451
error
0.333409832
error
0.33298189300000003
error
0.333471055
error
0.33310404899999996
error
0.33334863
error
0.332951368
error
0.332859827
error
0.333195725
error
0.33292084899999996
error
0.00027533
error
0.0
error
6.82e-07
error
0.001100601
error
0.001112031
error
0.022734868
error
0.000825953
error
2.2699999999999998e-07
error
0.0
error
0.0011003739999999999
error
0.0
error
0.00082565
error
0.04311163
error
0.00275156
error
1.36e-06
error
0.0
error
0.0
error
0.0027516359999999996
error
0.00055322
error
0.00027514099999999996
error
0.0
error
0.000274496
error
0.0
error
0.00027461
error
0.0
error
0.0
error
0.0
error
0.0
error
0.000274496
error
4.1700000000000004e-07
error
0.000275217
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.0
error
0.078392018
error
0.039253624
error
0.021353887000000002
error
0.028321434
error
0.028631862999999997
error
0.019388552
error
0.020472893
error
0.013094138
error
0.00961572
error
0.008217829
error
0.008718205
error
0.00524297
error
0.007707886
error
0.006045664
error
0.004115024
error
0.0029986809999999996
error
0.0031301659999999998
error
0.003951924
error
0.005579665
error
0.003865081
error
0.00263957
error
0.002394153
error
0.0009570830000000001
error
0.001216189
error
0.000689317
error
0.0006624760000000001
error
0.00046696300000000004
error
0.000463317
error
0.00037920300000000005
error
0.000295795
error
0.000363802
error
0.00015860899999999999
error
5.99e-05
error
4.6e-05
error
5.12e-05
error
5.19e-05
error
3.41e-05
error
3.17e-05
error
3.13e-05
error
3.289999999999999e-05
error
3.37e-05
error
2.8699999999999996e-05
error
2.7399999999999995e-05
error
6.78e-05
error
2.49e-05
error
2.14e-05
error
2.3199999999999998e-05
In [24]:
# in order to sort dicitonaries
import operator
In [25]:
#list to dict function
def list_to_dict(a):
    d = {a[j][0]:a[j][1] for j in range(len(a)) }

    return d
In [98]:
def custom_sort(t):
    return t[1]
In [137]:
# print top 10 nodes for given centrality measure
def top_print(name,centrality,n, nodelist, label):
    
    # sort the centrality to have the best above
    centrality_label = [(node, centrality[node]) for node in nodelist]
    centrality_label.sort(key=custom_sort, reverse = True)
    
    nodes = [node[0] for node in centrality_label][:n]
    
    print('-------------------------------------------')
    print("{}: WEIGHTED CENTRALITY MEASURE BY {}".format(label.upper(), name.upper()))
    print(' ')
    for k in range(0,n):
    # depends for decimals or not
        if centrality_label[k][1] > 1:
            print("{}. {}: {:.0F}".format(k+1,centrality_label[k][0].upper(),centrality_label[k][1]))
        else:
            print("{}. {}: {:.5F}".format(k+1,centrality_label[k][0].upper(),centrality_label[k][1]))
        k = k+1
    return nodes
In [138]:
#execute function for diseases
results = {}
for k in Graph_centrality_w:
    centrality = Graph_centrality_w[k]
    name = k
    results[k] = top_print(name,centrality,10,list(diseases), 'Disease' )
-------------------------------------------
DISEASE: WEIGHTED CENTRALITY MEASURE BY DEGREE CENTRALITY
 
1. MEASUREMENT: 14
2. GENETIC DISORDER: 13
3. GENETIC, FAMILIAL OR CONGENITAL DISEASE: 13
4. CELL PROLIFERATION DISORDER: 12
5. NEOPLASTIC DISEASE OR SYNDROME: 12
6. NEOPLASM: 12
7. CANCER: 12
8. RESPIRATORY OR THORACIC DISEASE: 11
9. THORACIC DISEASE: 11
10. EPITHELIAL NEOPLASM: 10
-------------------------------------------
DISEASE: WEIGHTED CENTRALITY MEASURE BY CLOSENESS CENTRALITY
 
1. MEASUREMENT: 0.50055
2. CELL PROLIFERATION DISORDER: 0.50041
3. NEOPLASM: 0.50041
4. NEOPLASTIC DISEASE OR SYNDROME: 0.50041
5. NERVOUS SYSTEM DISEASE: 0.50041
6. BONE DISEASE: 0.50034
7. BRAIN DISEASE: 0.50034
8. CENTRAL NERVOUS SYSTEM DISEASE: 0.50034
9. CONNECTIVE TISSUE DISEASE: 0.50034
10. HEMATOLOGICAL MEASUREMENT: 0.50034
-------------------------------------------
DISEASE: WEIGHTED CENTRALITY MEASURE BY BETWEENESS CENTRALITY
 
1. UTERINE DISEASE: 0.04301
2. PARALYSIS: 0.02283
3. SHORT STATURE DUE TO GHSR DEFICIENCY: 0.00302
4. COMBINED T AND B CELL IMMUNODEFICIENCY: 0.00151
5. IMMUNODEFICIENCY DISEASE: 0.00151
6. VISION DISORDER: 0.00139
7. ABNORMALITY OF BRAIN MORPHOLOGY: 0.00137
8. VISCERAL LEISHMANIASIS: 0.00137
9. CYTOMEGALOVIRUS INFECTION: 0.00110
10. LYMPHANGIOLEIOMYOMATOSIS: 0.00110
-------------------------------------------
DISEASE: WEIGHTED CENTRALITY MEASURE BY PAGERANK CENTRALITY
 
1. MEASUREMENT: 0.00157
2. GENETIC DISORDER: 0.00147
3. GENETIC, FAMILIAL OR CONGENITAL DISEASE: 0.00147
4. CELL PROLIFERATION DISORDER: 0.00133
5. NEOPLASTIC DISEASE OR SYNDROME: 0.00133
6. NEOPLASM: 0.00132
7. CANCER: 0.00126
8. RESPIRATORY OR THORACIC DISEASE: 0.00116
9. THORACIC DISEASE: 0.00115
10. NERVOUS SYSTEM DISEASE: 0.00112
In [142]:
#execute function for diseases
results = {}
for k in Graph_centrality_w:
    centrality = Graph_centrality_w[k]
    name = k
    results[k] = top_print(name,centrality,10,list(genes), 'Genes' )
-------------------------------------------
GENES: WEIGHTED CENTRALITY MEASURE BY DEGREE CENTRALITY
 
1. SCN9A: 668
2. IL21R: 315
3. TCEA1: 208
4. SLC18A2: 207
5. LCAT: 173
6. PDGFD: 144
7. CALD1: 134
8. DNAJC3: 97
9. TRIM13: 78
10. CDC20: 71
-------------------------------------------
GENES: WEIGHTED CENTRALITY MEASURE BY CLOSENESS CENTRALITY
 
1. COVID: 0.99357
2. LCAT: 0.37472
3. SCN9A: 0.37166
4. CALD1: 0.37150
5. SLC18A2: 0.36924
6. IL21R: 0.36737
7. PDGFD: 0.36112
8. FDFT1: 0.35859
9. DHRS11: 0.35568
10. CDC20: 0.35114
-------------------------------------------
GENES: WEIGHTED CENTRALITY MEASURE BY BETWEENESS CENTRALITY
 
1. COVID: 0.99939
2. FDFT1: 0.04311
3. PDGFD: 0.02273
4. PKIG: 0.00275
5. FKBP10: 0.00275
6. LCAT: 0.00111
7. SLC18A2: 0.00110
8. CDC20: 0.00110
9. CALD1: 0.00083
10. RRAGD: 0.00083
-------------------------------------------
GENES: WEIGHTED CENTRALITY MEASURE BY PAGERANK CENTRALITY
 
1. COVID: 0.13218
2. SCN9A: 0.07839
3. IL21R: 0.03925
4. LCAT: 0.02863
5. SLC18A2: 0.02832
6. TCEA1: 0.02135
7. CALD1: 0.02047
8. PDGFD: 0.01939
9. DNAJC3: 0.01309
10. TRIM13: 0.00962
In [150]:
def overall_ranking(G, G_centrality, n_top, G_infos, nodelist):
    ''' G: Graph
        G_centrality: centralities dict 
        n_top (int): number of nodes to be printed
        G_infos (dict)'''
    
    centralities = list(G_centrality.keys())
    
    # dict in order to have the ranking {best node 1: 1, best node 2: 2} for each centrality
    centrality_ranked = {}
    
    # for each key in the overall_rank dict, have the ranking they have in each centrality
    overall_rank = {n: [] for n in nodelist}

    for cen in centralities:
        centrality_label = {n: G_centrality[cen][n] for n in nodelist}
        ranking = sorted(centrality_label.items(),key = operator.itemgetter(1),reverse = True)
        
        centrality_ranked[cen] = {ranking[n][0]: n+1 for n in range(0,len(ranking))}
        
        for node in nodelist:
            overall_rank[node].append(centrality_ranked[cen][node])
        
    # sum the points each nodes earned in the ranking in the centralities
    total = {node: np.sum(overall_rank[node]) for node in nodelist}
    
    # sort in order to have the n_tops nodes in increasing order
    total_ranking = sorted(total.items(),key = operator.itemgetter(1),reverse = False)
    
    top_nodes = [total_ranking[i][0] for i in range(n_top)]
    results = {}
    
    print('OVERALL RANKING')

        
    for i in range(len(top_nodes)):
        node = top_nodes[i]
        results[node] = {}
        print('')
        print('------------------')
        print('Node: {}'.format(node.upper()))
        print('')
        print('No: {} in centrality overall ranking'.format(i+1))
        results[node]['Overall Ranking'] = i+1
        print('')
        print('Specificity:')
        print('')
        print('Number of connections: {}'.format(G_infos['Degrees'][node]))
        results[node]['Number of connections'] = G_infos['Degrees'][node]
        
        print('')
        for j in range(len(centralities)):
            print('No: {} in {} ranking'.format(overall_rank[node][j], str(centralities[j])))
            results[node][str(centralities[j])] = overall_rank[node][j]
        
        
    return centrality_ranked, total_ranking
In [151]:
centrality_ranked_d,total_ranking_d = overall_ranking(Graph, Graph_centrality_w,10, Graph_infos,diseases)
OVERALL RANKING

------------------
Node: BONE DISEASE

No: 1 in centrality overall ranking

Specificity:

Number of connections: 29

No: 40 in Degree Centrality ranking
No: 6 in Closeness Centrality ranking
No: 49 in Betweeness Centrality ranking
No: 37 in Pagerank Centrality ranking

------------------
Node: PSYCHIATRIC DISORDER

No: 2 in centrality overall ranking

Specificity:

Number of connections: 28

No: 62 in Degree Centrality ranking
No: 18 in Closeness Centrality ranking
No: 17 in Betweeness Centrality ranking
No: 60 in Pagerank Centrality ranking

------------------
Node: BIOLOGICAL PROCESS

No: 3 in centrality overall ranking

Specificity:

Number of connections: 25

No: 79 in Degree Centrality ranking
No: 30 in Closeness Centrality ranking
No: 38 in Betweeness Centrality ranking
No: 74 in Pagerank Centrality ranking

------------------
Node: PROTEIN MEASUREMENT

No: 4 in centrality overall ranking

Specificity:

Number of connections: 27

No: 137 in Degree Centrality ranking
No: 25 in Closeness Centrality ranking
No: 20 in Betweeness Centrality ranking
No: 108 in Pagerank Centrality ranking

------------------
Node: DIABETES MELLITUS

No: 5 in centrality overall ranking

Specificity:

Number of connections: 17

No: 114 in Degree Centrality ranking
No: 118 in Closeness Centrality ranking
No: 28 in Betweeness Centrality ranking
No: 107 in Pagerank Centrality ranking

------------------
Node: COLORECTAL CANCER

No: 6 in centrality overall ranking

Specificity:

Number of connections: 19

No: 142 in Degree Centrality ranking
No: 88 in Closeness Centrality ranking
No: 27 in Betweeness Centrality ranking
No: 157 in Pagerank Centrality ranking

------------------
Node: NON-SMALL CELL LUNG CARCINOMA

No: 7 in centrality overall ranking

Specificity:

Number of connections: 16

No: 105 in Degree Centrality ranking
No: 160 in Closeness Centrality ranking
No: 31 in Betweeness Centrality ranking
No: 118 in Pagerank Centrality ranking

------------------
Node: LIVER DISEASE

No: 8 in centrality overall ranking

Specificity:

Number of connections: 21

No: 149 in Degree Centrality ranking
No: 64 in Closeness Centrality ranking
No: 47 in Betweeness Centrality ranking
No: 156 in Pagerank Centrality ranking

------------------
Node: AUTOIMMUNE DISEASE

No: 9 in centrality overall ranking

Specificity:

Number of connections: 21

No: 206 in Degree Centrality ranking
No: 57 in Closeness Centrality ranking
No: 21 in Betweeness Centrality ranking
No: 218 in Pagerank Centrality ranking

------------------
Node: SQUAMOUS CELL CARCINOMA

No: 10 in centrality overall ranking

Specificity:

Number of connections: 17

No: 167 in Degree Centrality ranking
No: 135 in Closeness Centrality ranking
No: 33 in Betweeness Centrality ranking
No: 177 in Pagerank Centrality ranking
In [155]:
centrality_ranked_g,total_ranking_g = overall_ranking(Graph, Graph_centrality_w,10, Graph_infos,genes)
OVERALL RANKING

------------------
Node: LCAT

No: 1 in centrality overall ranking

Specificity:

Number of connections: 1188

No: 5 in Degree Centrality ranking
No: 2 in Closeness Centrality ranking
No: 6 in Betweeness Centrality ranking
No: 4 in Pagerank Centrality ranking

------------------
Node: SCN9A

No: 2 in centrality overall ranking

Specificity:

Number of connections: 1103

No: 1 in Degree Centrality ranking
No: 3 in Closeness Centrality ranking
No: 12 in Betweeness Centrality ranking
No: 2 in Pagerank Centrality ranking

------------------
Node: SLC18A2

No: 3 in centrality overall ranking

Specificity:

Number of connections: 1041

No: 4 in Degree Centrality ranking
No: 5 in Closeness Centrality ranking
No: 7 in Betweeness Centrality ranking
No: 5 in Pagerank Centrality ranking

------------------
Node: PDGFD

No: 4 in centrality overall ranking

Specificity:

Number of connections: 818

No: 6 in Degree Centrality ranking
No: 7 in Closeness Centrality ranking
No: 3 in Betweeness Centrality ranking
No: 8 in Pagerank Centrality ranking

------------------
Node: COVID

No: 5 in centrality overall ranking

Specificity:

Number of connections: 7217

No: 22 in Degree Centrality ranking
No: 1 in Closeness Centrality ranking
No: 1 in Betweeness Centrality ranking
No: 1 in Pagerank Centrality ranking

------------------
Node: CALD1

No: 6 in centrality overall ranking

Specificity:

Number of connections: 1097

No: 7 in Degree Centrality ranking
No: 4 in Closeness Centrality ranking
No: 9 in Betweeness Centrality ranking
No: 7 in Pagerank Centrality ranking

------------------
Node: FDFT1

No: 7 in centrality overall ranking

Specificity:

Number of connections: 753

No: 13 in Degree Centrality ranking
No: 8 in Closeness Centrality ranking
No: 2 in Betweeness Centrality ranking
No: 13 in Pagerank Centrality ranking

------------------
Node: TCEA1

No: 8 in centrality overall ranking

Specificity:

Number of connections: 527

No: 3 in Degree Centrality ranking
No: 11 in Closeness Centrality ranking
No: 19 in Betweeness Centrality ranking
No: 6 in Pagerank Centrality ranking

------------------
Node: CDC20

No: 9 in centrality overall ranking

Specificity:

Number of connections: 529

No: 10 in Degree Centrality ranking
No: 10 in Closeness Centrality ranking
No: 8 in Betweeness Centrality ranking
No: 12 in Pagerank Centrality ranking

------------------
Node: FKBP10

No: 10 in centrality overall ranking

Specificity:

Number of connections: 360

No: 14 in Degree Centrality ranking
No: 15 in Closeness Centrality ranking
No: 5 in Betweeness Centrality ranking
No: 14 in Pagerank Centrality ranking

4. Graph Representation

In [181]:
plt.figure(figsize=(15,15))

# Load
layout = np.load('layout_nx.npy',allow_pickle='TRUE').item()

# draw base etwork
nx.draw_networkx(Graph, pos = layout, node_size = 30, with_labels = False, node_color = 'grey', edge_color = '0.7')

# add bigger nodes in color
nodelist = list(Graph.nodes())
nodelist_d =  list(diseases)
nodelist_g = list(genes)

nx.draw_networkx_nodes(Graph,pos=layout,nodelist= nodelist_d, node_color='red', node_size = 30)
nx.draw_networkx_nodes(Graph,pos=layout,nodelist= nodelist_g, node_color='green', node_size = 30)



#plt.title(name + ': '+node_type, fontsize = 20)
plt.savefig('full_network.png')
plt.axis("off")
plt.show()
In [38]:
def draw_graph_top(G,centrality_ranked,n, layout, name, node_list, node_type):
    
    plt.figure(figsize=(15,15))

    
    # draw base etwork 
    nx.draw_networkx(G, pos = layout, node_size = 30, with_labels = False, node_color = 'grey', edge_color = '0.7')
    
    # add bigger nodes in color
    top_nodes = list(centrality_ranked.keys())[:n]
    labels = {i: i for i in top_nodes}
    colors = ['#1ECD3F', '#0C437E', '#30F77B', '#E0016D', '#E0C6F4', '#A402CF', '#6C5020', '#014F05', '#3636DE', '#0E82F6']
    big_size = 3000
    sizes = [big_size/(i+1) for i in range(n)]
        
    nx.draw_networkx_nodes(G,pos=layout,nodelist= top_nodes, with_labels = True, node_color=colors, node_size = sizes)
        
    nx.draw_networkx_labels(G,layout, labels,font_size=16, font_color = 'k') 
    
    
    plt.title(name + ': '+node_type, fontsize = 20)
    plt.savefig(name+'_'+node_type+'_centrality.png')
    plt.axis("off")
    plt.show()
In [39]:
# Load
layout = np.load('layout_nx.npy',allow_pickle='TRUE').item()
In [40]:
for cen in centrality_ranked_g:
    draw_graph_top(Graph,centrality_ranked_g[cen],10, layout, str(cen),genes, 'GENES')
In [41]:
for cen in centrality_ranked_d:
    draw_graph_top(Graph,centrality_ranked_d[cen],10, layout, str(cen),diseases, 'DISEASE')

   
In [183]:
total_ranking_d
Out[183]:
[('bone disease', 132),
 ('psychiatric disorder', 157),
 ('biological process', 221),
 ('protein measurement', 290),
 ('diabetes mellitus', 367),
 ('colorectal cancer', 414),
 ('non-small cell lung carcinoma', 414),
 ('liver disease', 416),
 ('autoimmune disease', 502),
 ('squamous cell carcinoma', 512),
 ('Acute Leukemia', 618),
 ('blood protein measurement', 649),
 ('uterine disease', 908),
 ('colorectal adenocarcinoma', 946),
 ('injury, poisoning or other complication', 977),
 ('Abnormality of nervous system physiology', 1010),
 ('Abnormality of the cardiovascular system', 1027),
 ('Central Nervous System Neoplasm', 1109),
 ('immunodeficiency disease', 1123),
 ('biliary tract disease', 1223),
 ('hepatocellular carcinoma', 1279),
 ('Abnormal eye movements', 1476),
 ('Digestive System Carcinoma', 1479),
 ('arthritis', 1483),
 ('joint disease', 1527),
 ('bone inflammation disease', 1550),
 ('Abnormality of skeletal morphology', 1567),
 ('Anomaly of puberty or/and menstrual cycle of genetic origin', 1704),
 ('Abnormality of the digestive system', 1710),
 ('Disorder of lipid metabolism', 1772),
 ('renal system measurement', 1779),
 ('Developmental anomaly of metabolic origin', 1820),
 ('esophageal adenocarcinoma', 1856),
 ('Abnormality of connective tissue', 1876),
 ('Combined T and B cell immunodeficiency', 1907),
 ('Chromosomal anomaly', 1917),
 ('Autosomal ichthyosis syndrome', 1926),
 ('Genetic tumor of hematopoietic and lymphoid tissues', 1949),
 ('B-cell neoplasm', 1957),
 ('B-cell non-Hodgkins lymphoma', 1985),
 ('Abnormality of the abdominal organs', 2008),
 ("Alzheimer's disease", 2048),
 ('Abnormality of the liver', 2142),
 ('Abnormality of the urinary system', 2156),
 ('Genetic multiple congenital anomalies/dysmorphic syndrome', 2159),
 ('Genetic peripheral neuropathy', 2161),
 ('Genetic neurodegenerative disease', 2172),
 ('Autosomal anomaly', 2188),
 ('CNS demyelinating autoimmune disease', 2203),
 ('Ataxia with dementia', 2251),
 ('Abnormality of the respiratory system', 2255),
 ('Inborn errors of metabolism', 2315),
 ('Genetic neuromuscular disease', 2400),
 ('Genetic developmental defect of the eye', 2473),
 ('Genetic overgrowth/obesity syndrome', 2481),
 ('Genetic head and neck malformation', 2517),
 ('Disorder of carbohydrate metabolism', 2540),
 ('Hepatobiliary Neoplasm', 2542),
 ('Disorder of energy metabolism', 2545),
 ('Genetic skeletal muscle disease', 2568),
 ('Congenital hypogonadotropic hypogonadism', 2604),
 ('Genetic obesity', 2625),
 ('Genetic central nervous system malformation', 2655),
 ('Abnormality of lipid metabolism', 2678),
 ('Genetic lens and zonula anomaly', 2689),
 ('Abnormality of the vasculature', 2711),
 ('FEV/FEC ratio', 2714),
 ('Genetic vitreous-retinal disease', 2745),
 ('Genetic neuro-ophthalmological disease', 2765),
 ('Autosomal dominant cerebellar ataxia', 2793),
 ('Combined dystonia', 2825),
 ('Autosomal dominant cerebellar ataxia type 1', 2829),
 ('Constitutional symptom', 2862),
 ('Genetic dementia', 2867),
 ('Abnormality of cardiovascular system morphology', 2899),
 ('ulcerative colitis', 2908),
 ('Genetic neurodegenerative disease with dementia', 2913),
 ('Endometrial Endometrioid Adenocarcinoma', 2944),
 ('Disorder of purine or pyrimidine metabolism', 2949),
 ('small cell lung carcinoma', 2960),
 ('Genetic infertility', 2973),
 ('Inherited cancer-predisposing syndrome', 3007),
 ('Malignant epithelial tumor of ovary', 3035),
 ('Agitation', 3036),
 ('Disorder of amino acid and other organic acid metabolism', 3048),
 ('Epilepsy syndrome', 3069),
 ('Genetic syndrome with a central nervous system malformation as major feature',
  3070),
 ('type I diabetes mellitus', 3090),
 ('Abnormal cardiovascular system physiology', 3099),
 ('Abnormality of skin morphology', 3140),
 ('Abdominal symptom', 3163),
 ('Malignant Urinary System Neoplasm', 3167),
 ('Adolescent-onset epilepsy syndrome', 3169),
 ('Genetic subcutaneous tissue disorder', 3178),
 ('Phenotypic abnormality', 3179),
 ('Genetic syndromic Pierre Robin syndrome', 3192),
 ('autism', 3236),
 ('Progressive supranuclear palsy', 3238),
 ('Cholecystitis', 3242),
 ('Metabolic disease associated with ocular features', 3242),
 ('Multiple congenital anomalies/dysmorphic syndrome-intellectual disability',
  3242),
 ('Benign Smooth Muscle Neoplasm', 3245),
 ('ARX-related epileptic encephalopathy', 3252),
 ('Genetic macular dystrophy', 3272),
 ('Atrophy', 3286),
 ('Genetic urogenital tract malformation', 3292),
 ('Abnormality of the ear', 3321),
 ('Genetic multiple congenital anomalies/dysmorphic syndrome - variable intellectual disability',
  3322),
 ('Genetic multiple congenital anomalies/dysmorphic syndrome without intellectual disability',
  3336),
 ('Abnormality of the bladder', 3367),
 ('Genetic renal or urinary tract malformation', 3369),
 ('Genetic congenital limb malformation', 3388),
 ('Autosomal monosomy', 3408),
 ('Arthrogryposis syndrome', 3451),
 ('Genetic syndrome with limb malformations as a major feature', 3470),
 ('Rare genetic developmental defect during embryogenesis', 3486),
 ('Rare genetic neurological disorder', 3490),
 ('Nephropathy secondary to a storage or other metabolic disease', 3505),
 ('Genetic soft tissue tumor', 3518),
 ('Rare genetic eye disease', 3518),
 ('Fish-eye disease', 3528),
 ('Genetic epidermal disorder', 3532),
 ('C-reactive protein measurement', 3535),
 ('Genetic lipodystrophy', 3559),
 ('Rare genetic tumor', 3571),
 ('Genetic syndrome with a cerebellar malformation as major feature', 3580),
 ('Rare genetic endocrine disease', 3632),
 ('colitis', 3634),
 ('Abnormality of the eye', 3649),
 ('Agammaglobulinemia', 3653),
 ('Cystic Kidney Disease', 3657),
 ('Acute hepatic porphyria', 3660),
 ('Familial LCAT deficiency', 3661),
 ('Cerebral organic aciduria', 3711),
 ('cognitive function measurement', 3719),
 ('rheumatoid arthritis', 3735),
 ('Rare genetic renal disease', 3736),
 ('Genetic motor neuron disease', 3739),
 ('sepsis', 3748),
 ('Rare genetic skin disease', 3752),
 ('Familial cystic renal disease', 3769),
 ('Familial partial lipodystrophy', 3783),
 ('Genetic malformation syndrome with short stature', 3797),
 ('Rare genetic intellectual disability', 3798),
 ('Metabolic disease with corneal opacity', 3800),
 ('Childhood-onset epilepsy syndrome', 3818),
 ('HIV infection', 3820),
 ('Dysostosis of genetic origin', 3828),
 ('Glycogen storage disease', 3834),
 ('Rare genetic intellectual disability with developmental anomaly', 3838),
 ('Fibroblastic Neoplasm', 3854),
 ('Autosomal recessive hereditary sensory and autonomic neuropathy', 3862),
 ('Hernia', 3864),
 ('Genetic hypertension', 3869),
 ('BMI-adjusted waist-hip ratio', 3888),
 ("Crohn's disease", 3892),
 ('Immune dysregulation disease with immunodeficiency', 3907),
 ('Hypoalphalipoproteinemia', 3911),
 ('Primary immunodeficiency', 3913),
 ('Primary immunodeficiency due to a defect in adaptive immunity', 3923),
 ('Dysostosis of genetic origin with limb anomaly as a major feature', 3937),
 ('Genetic epidermal appendage anomaly', 3949),
 ('Neurometabolic disease', 3963),
 ('multiple myeloma', 3972),
 ('B cell deficiency', 3996),
 ('Inherited ichthyosis', 4025),
 ('Abdominal Aortic Aneurysm', 4055),
 ('Genetic central nervous system and retinal vascular disease', 4055),
 ('Rare genetic bone disease', 4060),
 ('Rare genetic bone development disorder', 4072),
 ('Rare genetic hematologic disease', 4076),
 ('Abnormal glucose homeostasis', 4079),
 ('Malignant Pancreatic Neoplasm', 4086),
 ('Rare genetic epilepsy', 4088),
 ('Disorder of phospholipids, sphingolipids and fatty acids biosynthesis',
  4102),
 ('Rare genetic immune disease', 4111),
 ('Rare genetic movement disorder', 4144),
 ('Early-onset non-syndromic cataract', 4146),
 ('LCAT deficiency', 4148),
 ('Rare genetic tremor disorder', 4162),
 ('Rare otorhinolaryngological malformation', 4168),
 ('Abnormal digit morphology', 4194),
 ('Bardet-Biedl syndrome', 4199),
 ('Rare constitutional anemia', 4201),
 ('Metabolic myopathy', 4208),
 ('Inherited ichthyosis syndromic form', 4212),
 ('Rare genetic systemic or rheumatologic disease', 4213),
 ('Genodermatosis with ocular features', 4215),
 ('Abnormality of gastrointestinal vasculature', 4221),
 ('adenocarcinoma', 4232),
 ('Genetic parenchymatous liver disease', 4238),
 ('Muscular lipidosis', 4256),
 ('invasive breast ductal carcinoma', 4259),
 ('Primary bone dysplasia', 4269),
 ('Congenital disorder of glycosylation', 4299),
 ('Non-dystrophic myopathy', 4301),
 ('Lentivirus Infections', 4304),
 ('Rare cataract', 4307),
 ('anthropometric measurement', 4318),
 ('Rare dyslipidemia', 4329),
 ('Abnormality of blood glucose concentration', 4342),
 ('Breast Carcinoma by Gene Expression Profile', 4342),
 ('Lysosomal disease', 4358),
 ('Berardinelli-Seip congenital lipodystrophy', 4361),
 ('Genetic intestinal disease', 4364),
 ('Rare genetic cardiac disease', 4372),
 ('Genetic non-syndromic obesity', 4390),
 ('Autosomal recessive cerebellar ataxia', 4411),
 ('DNA repair deficiency', 4446),
 ('Huntington disease and related disorders', 4446),
 ('Pituitary deficiency', 4449),
 ('Chronic mucocutaneous candidosis', 4452),
 ('Non-acquired pituitary hormone deficiency', 4453),
 ('Lipid storage disease', 4459),
 ('brain disease', 4480),
 ('Genetic progeroid syndrome', 4501),
 ('Huntington disease-like syndrome', 4504),
 ('Genetic renal tubular disease', 4508),
 ('Disorder of purine metabolism', 4513),
 ('Syndrome or malformation associated with head and neck malformations',
  4517),
 ('cancer', 4521),
 ('Huntington disease', 4525),
 ('Hernia of the abdominal wall', 4526),
 ('body weights and measures', 4527),
 ('Rare hereditary disease with peripheral neuropathy', 4539),
 ('Rare hereditary metabolic disease with peripheral neuropathy', 4539),
 ('Motor stereotypies', 4540),
 ('acute myeloid leukemia', 4543),
 ('Monogenic disease with epilepsy', 4545),
 ('cell proliferation disorder', 4549),
 ('carcinoma', 4550),
 ('Erythromelalgia', 4555),
 ('central nervous system disease', 4583),
 ('Coronary artery disease - hyperlipidemia - hypertension - diabetes - osteoporosis',
  4588),
 ('breast disease', 4592),
 ('Rare genetic palpebral, lacrimal system and conjunctival disease', 4594),
 ('arterial disorder', 4597),
 ('Rare disease with autism', 4600),
 ('breast neoplasm', 4600),
 ('breast cancer', 4605),
 ('breast carcinoma', 4610),
 ('Autosomal dominant hereditary sensory and autonomic neuropathy', 4615),
 ('Alopecia', 4622),
 ('Borderline personality disorder', 4650),
 ('Familial partial lipodystrophy, Köbberling type', 4657),
 ('Rare genetic hepatic disease', 4670),
 ('bone marrow neoplasm', 4680),
 ('bone marrow disease', 4683),
 ('Syndromic obesity', 4686),
 ("Barrett's esophagus", 4698),
 ('Rare genetic diabetes mellitus', 4701),
 ("Parkinson's disease", 4706),
 ('Genetic hair anomaly', 4708),
 ('Genetic gynecological tumor', 4712),
 ('Autosomal recessive non-syndromic intellectual disability', 4713),
 ('Primary immunodeficiency due to a defect in innate immunity', 4730),
 ('astrocytoma', 4735),
 ('bone neoplasm', 4759),
 ('Late-onset ataxia with dementia', 4762),
 ('abnormality of brain morphology', 4764),
 ('abnormality of the nervous system', 4765),
 ('Nervous system anomaly with eye involvement', 4773),
 ('Genetic digestive tract malformation', 4774),
 ('autosomal genetic disease', 4775),
 ('Rare strabismus and restriction syndrome', 4782),
 ('X-linked disease', 4791),
 ('Retinal dystrophy', 4795),
 ('Rare genetic deafness', 4796),
 ('Autosomal recessive congenital ichthyosis', 4814),
 ('Bruck syndrome', 4818),
 ('connective tissue disease', 4819),
 ('Familial hemophagocytic lymphohistiocytosis', 4823),
 ('brain neoplasm', 4826),
 ('Cataract, Hutterite type', 4837),
 ('Malignant Mixed Neoplasm', 4837),
 ('Mitochondrial oxidative phosphorylation disorder', 4838),
 ('body height', 4839),
 ('Rare genetic gynecological and obstetrical diseases', 4840),
 ('Mitochondrial disease', 4851),
 ('Rare genetic male infertility', 4860),
 ('autosomal recessive disease', 4873),
 ('Rare male infertility due to hypothalamic-pituitary-gonadal axis disorder of genetic origin',
  4880),
 ('Oculomotor palsy', 4886),
 ('bone marrow cancer', 4888),
 ('bone cancer', 4891),
 ('Abnormality of refraction', 4893),
 ('Genetic photodermatosis', 4905),
 ('central nervous system cancer', 4908),
 ('Autosomal dominant progressive nephropathy with hypertension', 4911),
 ('Autosomal dominant non-syndromic intellectual disability', 4914),
 ('Miscellaneous movement disorder due to genetic neurodegenerative disease',
  4917),
 ('congenital abnormality', 4920),
 ('Cerebral malformation with epilepsy', 4931),
 ('body mass index', 4933),
 ('Burkitts lymphoma', 4945),
 ('Rare pervasive developmental disorder', 4946),
 ('anemia (disease)', 4946),
 ('Adrenogenital syndrome', 4954),
 ('Mitochondrial oxidative phosphorylation disorder due to nuclear DNA anomalies',
  4956),
 ('Disorder of porphyrin and haem metabolism', 4959),
 ('Beta-thalassemia and related diseases', 4963),
 ('Disorder of mineral absorption and transport', 4968),
 ('Channelopathy with epilepsy', 4975),
 ('complete blood cell count', 4978),
 ('Disorder of metabolite absorption and transport', 4982),
 ('Autosomal dominant pure spastic paraplegia', 4991),
 ('Inguinal hernia', 4994),
 ('Rare genetic urogenital disease', 4999),
 ('Leukodystrophy', 5008),
 ('Autosomal recessive complex spastic paraplegia', 5023),
 ('Hyperlipoproteinemia type 1', 5023),
 ('Genetic pigmentation anomaly of the skin', 5025),
 ('Genetic vascular anomaly', 5031),
 ('connective tissue neoplasm', 5032),
 ('endocrine system disease', 5033),
 ('Genetic frontotemporal degeneration with dementia', 5034),
 ('Major hypertriglyceridemia', 5037),
 ('Ectodermal dysplasia syndrome', 5039),
 ('abnormality of the immune system', 5044),
 ('Syndromic developmental defect of the eye', 5048),
 ('Hematological disorder with renal involvement', 5051),
 ('disease of visual system', 5052),
 ('developmental defect during embryogenesis', 5053),
 ('disease of genitourinary system', 5056),
 ('bone measurement', 5060),
 ('Acrocephalosyndactyly', 5066),
 ('digestive system neoplasm', 5071),
 ('epithelial neoplasm', 5073),
 ('Functional abnormality of the gastrointestinal tract', 5077),
 ('Syndrome with a symptomatic strabismus', 5080),
 ('autosomal dominant disease', 5081),
 ('quantitative and/or qualitative congenital phagocyte defect', 5083),
 ('Disorder of carbohydrate absorption and transport', 5091),
 ('developmental disorder of mental health', 5104),
 ('endocrine neoplasm', 5107),
 ('Rare genetic hypothalamic or pituitary disease', 5110),
 ('Abnormality of the head', 5112),
 ('Headache', 5113),
 ('Channelopathy-associated congenital insensitivity to pain', 5115),
 ('Craniofacial anomaly with cataract', 5119),
 ('chronic myeloproliferative disorder', 5119),
 ('Common variable immunodeficiency', 5120),
 ('Fragile X syndrome', 5128),
 ('Autosomal recessive metabolic cerebellar ataxia', 5130),
 ('congenital nervous system disorder', 5140),
 ('Rare genetic dystonia', 5143),
 ('Familial apolipoprotein C-II deficiency', 5144),
 ('Coralliform cataract', 5147),
 ('Genetic keratinization disorder associated with ocular features', 5153),
 ('digestive system cancer', 5154),
 ('Familial partial lipodystrophy associated with PLIN1 mutations', 5157),
 ('Chondrodysplasia punctata', 5158),
 ('Charcot-Marie-Tooth disease', 5164),
 ('bone development disease', 5165),
 ('Immunodeficiency syndrome with autoimmunity', 5178),
 ('Primary lipodystrophy', 5182),
 ('connective tissue cancer', 5190),
 ('Familial partial epilepsy', 5191),
 ('colorectal neoplasm', 5191),
 ('Frontotemporal neurodegeneration with movement disorder', 5198),
 ('Autosomal trisomy', 5203),
 ('Syndrome associated with Pierre Robin syndrome', 5203),
 ('Genetic hyperpigmentation of the skin', 5221),
 ('genetic disorder', 5222),
 ('eye disease', 5226),
 ('Early-onset ataxia with dementia', 5227),
 ('Rare genetic female infertility', 5227),
 ('Syndromic genetic deafness', 5231),
 ('genetic, familial or congenital disease', 5232),
 ('abnormality of metabolism/homeostasis', 5237),
 ('cataract', 5239),
 ('Disorder of copper metabolism', 5251),
 ('Complex hereditary spastic paraplegia', 5253),
 ('Primary bone dysplasia with defective bone mineralization', 5254),
 ('gastrointestinal disease', 5260),
 ('Cough', 5262),
 ('Malformation syndrome with skin/mucosae involvement', 5271),
 ('bone fracture related measurement', 5278),
 ('Hemoglobinopathy', 5284),
 ('Rare hypolipidemia', 5287),
 ('endocrine pancreas disease', 5289),
 ('Non-syndromic developmental defect of the eye', 5291),
 ('Rare metabolic liver disease', 5297),
 ('Craniosynostosis', 5301),
 ('Genetic immune deficiency with skin involvement', 5313),
 ('cognitive disorder', 5314),
 ('abnormality of higher mental function', 5315),
 ('relapsing-remitting multiple sclerosis', 5315),
 ('Anterior polar cataract', 5316),
 ('Cryptosporidiosis - chronic cholangitis - liver disease', 5319),
 ('Congenital disorder of glycosylation with neurological involvement', 5324),
 ('Connective tissue disease with eye involvement', 5330),
 ('Congenital intestinal transport defect', 5335),
 ('Disorder of iron metabolism and transport', 5339),
 ('colorectal carcinoma', 5346),
 ('Dent disease', 5355),
 ('Neurodegenerative disease with chorea', 5359),
 ('Syndromic cataract', 5359),
 ('behavior', 5364),
 ('epithelial skin neoplasm', 5366),
 ('glandular cell neoplasm', 5369),
 ('bone density', 5370),
 ('Kinetic eyelid anomaly', 5373),
 ('Familial dilated cardiomyopathy', 5400),
 ('hematologic disease', 5402),
 ('adenocarcinoma of liver and intrahepatic biliary tract', 5403),
 ('Syndromic renal or urinary tract malformation', 5408),
 ('female reproductive system disease', 5408),
 ('hematological measurement', 5421),
 ('Rare hereditary ataxia', 5429),
 ('Genetic thrombotic microangiopathy', 5431),
 ('DNA repair defect other than combined T-cell and B-cell immunodeficiencies',
  5433),
 ('Glycogen storage disease due to liver phosphorylase kinase deficiency',
  5439),
 ('Lymphoproliferative syndrome', 5440),
 ('Glucose transport disorder', 5446),
 ('Benign familial neonatal seizures', 5454),
 ('Genetic susceptibility to infections due to particular pathogens', 5455),
 ('hereditary connective tissue disorder', 5455),
 ('Dent disease type 1', 5460),
 ('Unclassified primitive or secondary maculopathy', 5468),
 ('Rare palpebral disease', 5474),
 ('abnormality of blood and blood-forming tissues', 5476),
 ('Autosomal dominant isolated diffuse palmoplantar keratoderma', 5479),
 ('Sleep Disorder', 5485),
 ('Early infantile epileptic encephalopathy', 5492),
 ('HER2 Positive Breast Carcinoma', 5503),
 ('Rare insulin-resistance syndrome', 5509),
 ('abnormality of the skeletal system', 5511),
 ('hematopoietic and lymphoid system neoplasm', 5519),
 ('immune system disease', 5519),
 ('hematopoietic and lymphoid cell neoplasm', 5530),
 ('Dentocutaneous disease with cataract', 5534),
 ('Eyebrow/eyelashes structural anomaly', 5540),
 ('genetic skin disease', 5544),
 ('Epstein-Barr virus-related tumor', 5549),
 ('carbohydrate metabolism disease', 5550),
 ('female reproductive system neoplasm', 5555),
 ('bladder disease', 5561),
 ('Hyperlipoproteinemia type 4', 5562),
 ('hepatobiliary disease', 5564),
 ('CHILD syndrome', 5569),
 ('benign neoplasm', 5573),
 ('integumentary system disease', 5573),
 ('gonadal disease', 5576),
 ('Ichthyosis associated with ocular features', 5577),
 ('heart disease', 5578),
 ('FTH1-related iron overload', 5582),
 ('Congenital intestinal motility disorder', 5590),
 ('Spinocerebellar ataxia with oculomotor anomaly', 5593),
 ('cardiovascular disease biomarker measurement', 5594),
 ('female reproductive organ cancer', 5595),
 ('Immunodeficiency predominantly affecting antibody production', 5601),
 ('Dravet syndrome', 5603),
 ('Rare disorder with hypogonadotropic hypogonadism', 5604),
 ('cardiovascular measurement', 5605),
 ('Organic aciduria', 5608),
 ('Metabolic disease with dementia', 5609),
 ('Abnormality of head or neck', 5612),
 ('autoimmune disease of the nervous system', 5612),
 ('3-methylglutaconic aciduria', 5616),
 ('depressive disorder', 5616),
 ('Primary hemophagocytic lymphohistiocytosis', 5617),
 ('Genetic skin tumor', 5618),
 ('Rare constitutional hemolytic anemia', 5619),
 ('Severe combined immunodeficiency', 5620),
 ('auditory system disease', 5627),
 ('Classic organic aciduria', 5629),
 ('Genetic chronic primary adrenal insufficiency', 5630),
 ('Familial restrictive cardiomyopathy', 5637),
 ('glucose metabolism disease', 5639),
 ('Genetic non-syndromic central nervous system malformation', 5646),
 ('Partial autosomal monosomy', 5652),
 ('Disorder of peroxisomal alpha-, beta- and omega-oxidation', 5656),
 ('Herpesviridae infectious disease', 5662),
 ('basal ganglia disease', 5666),
 ('Asperger syndrome', 5673),
 ('Genetic cerebral malformation', 5674),
 ('Autosomal dominant optic atrophy', 5680),
 ('Disorder of glycolysis', 5682),
 ('Non-syndromic congenital cataract', 5682),
 ('glioma', 5686),
 ('Epstein-Barr virus-associated malignant lymphoproliferative disorder',
  5689),
 ('Supranuclear oculomotor palsy', 5695),
 ('2-methylbutyryl-CoA dehydrogenase deficiency', 5697),
 ('Infantile epilepsy syndrome', 5703),
 ('Nausea and vomiting', 5703),
 ('carcinoma of liver and intrahepatic biliary tract', 5714),
 ('colonic disease', 5720),
 ('epilepsy', 5721),
 ('Rare hyperlipidemia', 5727),
 ('coronary heart disease', 5734),
 ('Autosomal dominant intermediate Charcot-Marie-Tooth disease type E', 5738),
 ('Rare genetic coagulation disorder', 5742),
 ('Colobomatous microphthalmia', 5744),
 ('glioblastoma multiforme', 5745),
 ('kidney disease', 5748),
 ('infectious disease', 5762),
 ('Genetic disorder of sex development', 5765),
 ('Homozygous familial hypercholesterolemia', 5767),
 ('central nervous system malformation', 5768),
 ('disease of central nervous system or retinal vasculature', 5771),
 ('Congenital alveolar capillary dysplasia', 5781),
 ('autoimmune disease of central nervous system', 5782),
 ('Cerulean cataract', 5783),
 ('Abnormality of mitochondrial metabolism', 5789),
 ('clear cell renal carcinoma', 5792),
 ('MODY', 5798),
 ('Abnormality of cardiovascular system electrophysiology', 5801),
 ('cognition', 5803),
 ('measurement', 5805),
 ('alcohol drinking', 5812),
 ('hypersensitivity reaction disease', 5814),
 ('Metabolic disease with skin involvement', 5816),
 ('cerebrovascular disorder', 5822),
 ('integumentary system cancer', 5830),
 ('Persistent combined dystonia', 5833),
 ('cerebellar disease', 5835),
 ('Hereditary spastic paraplegia', 5843),
 ('immune system cancer', 5846),
 ('lower respiratory tract disease', 5847),
 ('Calcium channel blocker use measurement', 5848),
 ('Genetic cranial malformation', 5849),
 ('asthma', 5849),
 ('high grade malignant neoplasm', 5849),
 ('Intestinal malformation', 5854),
 ('Intestinal Type Adenocarcinoma', 5855),
 ('head and neck neoplasia', 5856),
 ('digestive system infectious disease', 5860),
 ('Immunodeficiency due to a complement cascade protein anomaly', 5862),
 ('intestinal disease', 5862),
 ('lung disease', 5862),
 ('drug use measurement', 5868),
 ('Eczema', 5869),
 ('Rare female infertility due to a congenital hypogonadotropic hypogonadism',
  5870),
 ('Metabolic disease with cataract', 5875),
 ('Ataxia', 5884),
 ('Autosomal recessive hyperinsulinism due to Kir6.2 deficiency', 5888),
 ('Atypical hemolytic-uremic syndrome', 5890),
 ('Autosomal dominant hyper-IgE syndrome', 5891),
 ('IGA glomerulonephritis', 5891),
 ('Autoimmune lymphoproliferative syndrome', 5893),
 ('Bloom syndrome', 5893),
 ('complex neurodevelopmental disorder', 5893),
 ('Invasive Breast Carcinoma', 5894),
 ('Generalized epilepsy with febrile seizures-plus', 5901),
 ('esophageal carcinoma', 5901),
 ('esophageal disease', 5907),
 ('Genetic cardiac rhythm disease', 5918),
 ('Action myoclonus - renal failure syndrome', 5919),
 ('leukemia', 5921),
 ('X-linked syndromic intellectual disability', 5934),
 ('Rare female infertility due to hypothalamic-pituitary-gonadal axis disorder of genetic origin',
  5946),
 ('clear cell adenocarcinoma', 5947),
 ('metabolic disease', 5953),
 ('Generalized tonic-clonic seizures', 5956),
 ('Aniridia', 5959),
 ('Autosomal recessive hyper-IgE syndrome', 5960),
 ('lymphoid neoplasm', 5960),
 ('Ectodermal malformation syndrome associated with ocular features', 5961),
 ('lung neoplasm', 5967),
 ('mental or behavioural disorder', 5967),
 ('Rare disorder with female infertility due to a congenital hypogonadotropic hypogonadism',
  5968),
 ('Mitochondrial disease with peripheral neuropathy', 5977),
 ('Muscular dystrophy', 5982),
 ('musculoskeletal or connective tissue disease', 5984),
 ('high grade astrocytic tumor', 5987),
 ('Congenital myopathy', 5989),
 ('lung cancer', 5991),
 ('Hereditary sensory and autonomic neuropathy type 2', 5995),
 ('musculoskeletal system disease', 5997),
 ('Syndromic neurometabolic disease with non-X-linked intellectual disability',
  5998),
 ('Autosomal agammaglobulinemia', 6002),
 ('Adams-Oliver syndrome', 6003),
 ('neoplasm', 6003),
 ('dermatitis', 6005),
 ('Systemic disease with cataract', 6006),
 ('intestinal neoplasm', 6009),
 ('abnormality of the skin', 6011),
 ('neoplastic disease or syndrome', 6014),
 ('Aicardi-Goutières syndrome', 6015),
 ('Lethal congenital contracture syndrome type 3', 6016),
 ('abnormality of the integument', 6021),
 ('gastric carcinoma', 6022),
 ('Benign essential blepharospasm', 6028),
 ('leukocyte count', 6028),
 ('Autosomal recessive infantile hypercalcemia', 6029),
 ('lung carcinoma', 6029),
 ('Antenatal Bartter syndrome', 6037),
 ('hypertension', 6040),
 ('nervous system disease', 6041),
 ('Polymalformative genetic syndrome with increased risk of developing cancer',
  6043),
 ('Abnormality of urine homeostasis', 6044),
 ('Agents acting on the renin-angiotensin system use measurement', 6045),
 ('Neutral lipid storage myopathy', 6047),
 ('head and neck malignant neoplasia', 6050),
 ('Rare genetic gastroenterological disease', 6051),
 ('anomaly of puberty or/and menstrual cycle', 6052),
 ('genetic otorhinolaryngologic disease', 6052),
 ('Malignant Germ Cell Tumor', 6055),
 ('Metabolic disease with pigmentary retinitis', 6056),
 ('intestinal cancer', 6057),
 ('endometrioid carcinoma', 6058),
 ('Autosomal dominant distal hereditary motor neuropathy', 6059),
 ('Autosomal recessive optic atrophy', 6063),
 ('Holoprosencephaly', 6063),
 ('colonic neoplasm', 6065),
 ('bronchial disease', 6066),
 ('Lethal congenital contracture syndrome', 6072),
 ('large intestine disease', 6077),
 ('male reproductive system disease', 6077),
 ('Bartter syndrome', 6087),
 ('Autosomal recessive syndromic optic atrophy', 6092),
 ('Extramammary Paget Disease', 6093),
 ('bladder tumor', 6095),
 ('serum non-albumin protein measurement', 6097),
 ('Genetic optic atrophy', 6102),
 ('neoplasm of thorax', 6102),
 ('Inherited non-syndromic ichthyosis', 6114),
 ('X-linked lymphoproliferative disease', 6115),
 ('Hemoglobinopathy Toms River', 6118),
 ('bacterial disease', 6119),
 ('Anophthalmia - microphthalmia', 6129),
 ('Eyebrow/eyelashes pigmentation anomaly', 6131),
 ('Mitochondrial disease with eye involvement', 6131),
 ('coronary artery disease', 6133),
 ('Genetic glomerular disease', 6147),
 ('bile duct disease', 6148),
 ('Congenital alacrima', 6149),
 ('Generalized congenital lipodystrophy with myopathy', 6152),
 ('erythrocyte measurement', 6156),
 ('disease of pilosebaceous unit', 6165),
 ('amyotrophic lateral sclerosis', 6169),
 ('erythrocyte indices', 6182),
 ('Niemann-Pick disease', 6185),
 ('gastric cancer', 6192),
 ('neutrophil percentage of leukocytes', 6192),
 ('Autosomal recessive hyper-IgE syndrome due to TYK2 deficiency', 6193),
 ('Peroxisomal disease', 6197),
 ('Hearing abnormality', 6198),
 ('Congenital dyserythropoietic anemia type I', 6201),
 ('malignant endocrine neoplasm', 6205),
 ('Genetic interstitial lung disease', 6207),
 ('Keratoconus', 6211),
 ('15q11q13 microduplication syndrome', 6212),
 ('anterior horn disease', 6216),
 ('nutritional or metabolic disease', 6217),
 ('17p11.2 microduplication syndrome', 6222),
 ('Major induction processes eye anomaly', 6222),
 ('Cerebral ischemia', 6225),
 ('Medullar aplasia', 6231),
 ('Autosomal recessive mendelian susceptibility to mycobacterial diseases due to a partial deficiency',
  6232),
 ('Coenzyme Q10 deficiency', 6239),
 ('Moyomoya angiopathy', 6239),
 ('head and neck carcinoma', 6240),
 ('Genetic malformation syndrome with odontal and/or periodontal component',
  6242),
 ('Rare familial disorder with hypertrophic cardiomyopathy', 6243),
 ('Autosomal recessive pure spastic paraplegia', 6246),
 ('eye degenerative disease', 6249),
 ('Adult-onset dystonia-parkinsonism', 6253),
 ('Congenital muscular dystrophy', 6255),
 ('4-hydroxybutyric aciduria', 6256),
 ('kidney neoplasm', 6259),
 ('male reproductive system neoplasm', 6261),
 ('Hallux valgus', 6269),
 ('Avascular necrosis of genetic origin', 6273),
 ('Genetic endocrine tumor', 6273),
 ('Neurocutaneous syndrome with epilepsy', 6288),
 ('Anorectal malformation', 6291),
 ('blood pressure', 6295),
 ('Rare non-syndromic cataract', 6304),
 ('Familial hyperinsulinism', 6305),
 ('colon carcinoma', 6306),
 ('Infantile hypophosphatasia', 6307),
 ('cirrhosis of liver', 6309),
 ('male reproductive organ cancer', 6314),
 ('bipolar disorder', 6319),
 ('behavioral abnormality', 6320),
 ('nervous system neoplasm', 6322),
 ('Conjunctival lymphangiectasia', 6324),
 ('neurodegenerative disease', 6325),
 ('Muscular glycogenosis', 6326),
 ('Cochleosaccular degeneration - cataract', 6331),
 ('muscular disease', 6333),
 ('Congenital dyserythropoietic anemia', 6338),
 ('DNA methylation', 6338),
 ('Moyamoya syndrome', 6341),
 ("Alzheimer's disease biomarker measurement", 6347),
 ('cognitive impairment', 6347),
 ('granulocyte count', 6353),
 ('monogenic disease', 6353),
 ('Ptosis', 6358),
 ('lens disease', 6364),
 ('lung adenocarcinoma', 6364),
 ('breast adenocarcinoma', 6372),
 ('Abnormality of muscle physiology', 6373),
 ('kidney cancer', 6376),
 ('musculoskeletal system cancer', 6377),
 ('benign prostatic hyperplasia', 6380),
 ('cranial nerve neuropathy', 6385),
 ('Rare genetic odontologic disease', 6388),
 ('lymphoid system disease', 6388),
 ('Autosomal ichthyosis syndrome with prominent hair abnormalities', 6389),
 ('multiple congenital anomalies/dysmorphic syndrome', 6389),
 ('Tourette syndrome', 6390),
 ('liver neoplasm', 6390),
 ('phenotype', 6390),
 ('biliary liver cirrhosis', 6391),
 ('Constitutional dyserythropoietic anemia', 6392),
 ('Hereditary sensory and autonomic neuropathy', 6400),
 ('lymphatic system disease', 6402),
 ('Constitutional neutropenia', 6403),
 ('anxiety disorder', 6403),
 ('malignant glioma', 6404),
 ('Autosomal dominant progressive external ophthalmoplegia', 6405),
 ('Epicanthal fold', 6407),
 ('Canthal anomaly', 6410),
 ('Neurodegeneration with brain iron accumulation', 6410),
 ('Cushing syndrome', 6411),
 ('Autosomal dominant nocturnal frontal lobe epilepsy', 6414),
 ('Hereditary motor and sensory neuropathy', 6417),
 ('acquired peripheral neuropathy', 6419),
 ('Niemann-Pick disease type E', 6422),
 ('cerebellar ataxia', 6422),
 ('myeloid neoplasm', 6423),
 ('muscle tissue disease', 6424),
 ('central nervous system infection', 6428),
 ('Idiopathic or cryptogenic familial epilepsy syndrome with identified loci/genes',
  6429),
 ('chronic lymphocytic leukemia', 6430),
 ('Acro-oto-ocular syndrome', 6431),
 ('myeloid hemopathy', 6435),
 ('demyelinating disease', 6444),
 ('Autosomal dominant optic atrophy and cataract', 6447),
 ('attention deficit hyperactivity disorder', 6448),
 ('Optic neuropathy', 6449),
 ('Acrofacial dysostosis, Rodríguez type', 6450),
 ('congenital limb malformation', 6453),
 ('Alpha-thalassemia', 6455),
 ('myeloid leukemia', 6455),
 ('fibrosis', 6457),
 ('lymphoid hemopathy', 6457),
 ('lymphoma', 6461),
 ('Rare intellectual disability without developmental anomaly', 6462),
 ('mouth disease', 6463),
 ('calcium metabolic disease', 6464),
 ('heel bone mineral density', 6467),
 ('nervous system cancer', 6471),
 ('Rare eyebrow/eyelashes anomaly', 6472),
 ('Niemann-Pick disease type B', 6477),
 ('neuropathy', 6481),
 ('pancreas disease', 6491),
 ('Acatalasemia', 6492),
 ('neuroepithelial neoplasm', 6492),
 ('Autosomal ichthyosis syndrome with fatal disease course', 6500),
 ('Congenital nephrotic syndrome, Finnish type', 6501),
 ('Obesity due to CEP19 deficiency', 6503),
 ('6-pyruvoyl-tetrahydropterin synthase deficiency', 6505),
 ('Syndromic urogenital tract malformation', 6509),
 ('drug dependence', 6511),
 ('Tendinopathy', 6515),
 ('Congenital isolated hyperinsulinism', 6517),
 ('Autosomal dominant hereditary axonal motor and sensory neuropathy', 6519),
 ('Adult-onset autosomal recessive sideroblastic anemia', 6521),
 ('drug-induced mental disorder', 6524),
 ('eosinophil count', 6526),
 ('carcinoma of esophagus', 6527),
 ('epithelial tumor of colon', 6530),
 ('Other immunodeficiency syndrome due to defects in adaptive immunity', 6531),
 ('Familial primary hyperparathyroidism', 6532),
 ('fat body mass', 6532),
 ('drinking behavior', 6535),
 ('Epstein-Barr virus infection', 6539),
 ('Sphingolipidosis', 6541),
 ('chronic leukemia', 6541),
 ('Alagille syndrome due to a NOTCH2 point mutation', 6543),
 ('Isolated dystonia', 6543),
 ('Isolated agammaglobulinemia', 6546),
 ('leukocyte disease', 6552),
 ('Hepatoerythropoietic porphyria', 6553),
 ('Metabolic disease with intestinal involvement', 6557),
 ('Autosomal dominant hyperinsulinism due to SUR1 deficiency', 6558),
 ('Acyl-CoA dehydrogenase deficiency', 6562),
 ('Autosomal dominant intermediate Charcot-Marie-Tooth disease', 6562),
 ('endometrial disease', 6564),
 ('Alopecia-intellectual disability syndrome', 6566),
 ('infertility', 6568),
 ('Syndromic intestinal malformation', 6569),
 ('cardiac arrhythmia', 6570),
 ('cardiac rhythm disease', 6571),
 ('Genetic renal tumor', 6572),
 ('Syndromic retinitis pigmentosa', 6575),
 ('Alpha-thalassemia - myelodysplastic syndrome', 6590),
 ('mood disorder', 6592),
 ('actinic keratosis', 6593),
 ('Alternating hemiplegia of childhood', 6599),
 ('nutritional disorder', 6603),
 ('Alopecia totalis', 6606),
 ('Autosomal dominant striatal neurodegeneration', 6615),
 ('Ovarian Endometrioid Adenocarcinoma', 6616),
 ('melanocytic neoplasm', 6616),
 ('Conjunctival vascular anomaly', 6633),
 ('non-melanoma skin carcinoma', 6636),
 ('ovarian carcinoma', 6637),
 ('alcohol-related disorders', 6644),
 ('melanoma', 6644),
 ('peripheral nervous system disease', 6646),
 ('respiratory or thoracic disease', 6648),
 ('breast ductal adenocarcinoma', 6649),
 ('ovarian disease', 6651),
 ('Alpha granule disease', 6654),
 ('dystonic disorder', 6657),
 ('Dense granule disease', 6658),
 ('disease of bone structure', 6659),
 ('Atypical hemolytic-uremic syndrome with DGKE deficiency', 6664),
 ('Atypical juvenile parkinsonism', 6664),
 ('Aortic arch anomaly - peculiar facies - intellectual disability', 6665),
 ('bone fracture', 6665),
 ('hearing loss', 6665),
 ('cardiomyopathy', 6666),
 ('movement disorder', 6670),
 ('Male infertility due to gonadal dysgenesis or sperm disorder', 6671),
 ('cerebellar degeneration', 6674),
 ('hearing disorder', 6674),
 ('inherited auditory system disease', 6675),
 ('inflammation', 6680),
 ('reproductive system or breast disease', 6680),
 ('Autosomal recessive malignant osteopetrosis', 6683),
 ('neuromuscular disease', 6683),
 ('Myopathy with eye involvement', 6687),
 ('Disorder of pterin metabolism', 6689),
 ('Leigh syndrome', 6689),
 ('Retinitis pigmentosa', 6691),
 ('Brain Stem Glioblastoma', 6692),
 ('Ovarian Endometrioid Adenocarcinoma with Squamous Differentiation', 6694),
 ('respiratory system disease', 6704),
 ('injury', 6705),
 ('Rare parkinsonian syndrome due to genetic neurodegenerative disease', 6709),
 ('inflammatory skin disease', 6710),
 ('Genetic hyperparathyroidism', 6717),
 ('Becker muscular dystrophy', 6719),
 ('Congenital dyserythropoietic anemia type II', 6721),
 ('Total congenital cataract', 6721),
 ('peripheral neuropathy', 6721),
 ('Rare genetic respiratory disease', 6724),
 ('Ataxia-telangiectasia-like disorder', 6733),
 ('endometrial cancer', 6740),
 ('myopathy', 6740),
 ('ovarian neoplasm', 6745),
 ('germ cell tumor', 6748),
 ('Congenital disorder of glycosylation with epilepsy as a major feature',
  6753),
 ('embryonal neoplasm', 6756),
 ('neuroendocrine neoplasm', 6756),
 ('cutaneous melanoma', 6757),
 ('genetic endocrine growth disease', 6759),
 ('Familial idiopathic steroid-resistant nephrotic syndrome', 6760),
 ('ovarian cancer', 6762),
 ('ovarian epithelial tumor', 6767),
 ('Mitochondrial oxidative phosphorylation disorder due to mitochondrial DNA anomalies',
  6772),
 ('cholelithiasis', 6773),
 ('reproductive system disease', 6775),
 ('Neonatal epilepsy syndrome', 6780),
 ('Lennox-Gastaut syndrome', 6783),
 ('Fanconi anemia', 6784),
 ('Isolated oxidative phosphorylation complex disorder', 6786),
 ('rare hematologic disease', 6791),
 ('Beta-thalassemia - X-linked thrombocytopenia', 6796),
 ('endometrium neoplasm', 6796),
 ('atrial fibrillation', 6797),
 ('endometrial neoplasm', 6800),
 ('obesity', 6800),
 ('Beta-thalassemia with other manifestations', 6802),
 ('biliary tract neoplasm', 6808),
 ('Orofacial clefting syndrome', 6810),
 ('Seizures', 6813),
 ('Partial deletion of chromosome 17', 6820),
 ('Orthomyxoviridae infectious disease', 6821),
 ('Diffuse palmoplantar keratoderma', 6828),
 ('Partial monosomy of the short arm of chromosome 17', 6828),
 ('skeletal system disease', 6830),
 ('Combined immunodeficiency with skin granulomas', 6833),
 ('Hypoglycemia', 6833),
 ('colon adenocarcinoma', 6833),
 ('Rare genetic parkinsonian disorder', 6834),
 ('Rare hemorrhagic disorder due to a constitutional thrombocytopenia', 6834),
 ('Disorder of phenylalanin or tyrosine metabolism', 6837),
 ('biliary tract cancer', 6839),
 ('endometrial carcinoma', 6841),
 ('cervix disease', 6849),
 ('Autosomal dominant congenital benign spinal muscular atrophy', 6853),
 ('reproductive system neoplasm', 6856),
 ('Rare genetic adrenal disease', 6857),
 ('Mitochondrial myopathy', 6858),
 ('respiratory system neoplasm', 6859),
 ('Leber congenital amaurosis', 6860),
 ('neurovascular disease', 6863),
 ('Chemotherapy-induced nausea and vomiting', 6864),
 ('Congenital hypothyroidism', 6864),
 ('Autosomal recessive progressive external ophthalmoplegia', 6865),
 ('bile duct neoplasm', 6866),
 ('Infantile dystonia-parkinsonism', 6867),
 ('Fanconi syndrome', 6869),
 ('Blackfan-Diamond anemia', 6870),
 ('respiratory tract neoplasm', 6872),
 ('skin disease', 6875),
 ('female infertility', 6877),
 ('age', 6878),
 ('anxiety', 6886),
 ('liver and intrahepatic bile duct neoplasm', 6886),
 ('Anomaly of the secretory and excretory apparatus of the lacrimal system',
  6889),
 ('acute pancreatitis', 6893),
 ('liver cancer', 6893),
 ('Familial long QT syndrome', 6894),
 ('respiratory system cancer', 6894),
 ('Genetic disorder of sex development of gynecological interest', 6896),
 ('overnutrition', 6896),
 ('cartilage disease', 6897),
 ('reproductive system cancer', 6898),
 ('Autosomal dominant hyperinsulinism due to Kir6.2 deficiency', 6900),
 ('Hereditary glaucoma', 6905),
 ('benign reproductive system neoplasm', 6908),
 ('genetic multiple congenital anomalies/dysmorphic syndrome-variable intellectual disability syndrome',
  6908),
 ('head and neck squamous cell carcinoma', 6910),
 ('abnormality of the gastrointestinal tract', 6916),
 ('Cardiodysrhythmic potassium-sensitive periodic paralysis', 6917),
 ('Congenital muscular dystrophy with cerebellar involvement', 6921),
 ('Porphyria', 6921),
 ('body fat percentage', 6926),
 ('Abnormal emotion/affect behavior', 6929),
 ('Galloway-Mowat syndrome', 6930),
 ('mental process', 6931),
 ('Distal renal tubular acidosis', 6934),
 ('Congenital muscular dystrophy due to dystroglycanopathy', 6941),
 ('Nuclear cataract', 6941),
 ('body weight', 6949),
 ('chronic interstitial cystitis', 6955),
 ('bile duct cancer', 6956),
 ('Mastodynia', 6959),
 ('bile duct carcinoma', 6962),
 ('body composition measurement', 6962),
 ('metabolic disease with epilepsy', 6962),
 ('prostate carcinoma', 6963),
 ('glomerular filtration rate', 6964),
 ('cranial nerve palsy', 6965),
 ('alcohol-induced mental disorder', 6966),
 ('Congenital dyserythropoietic anemia type IV', 6971),
 ('Disorder of protein O-glycosylation', 6972),
 ('Familial steroid-resistant nephrotic syndrome with sensorineural deafness',
  6973),
 ('MORM syndrome', 6975),
 ('gastric adenocarcinoma', 6977),
 ('Disorder of mitochondrial fatty acid oxidation', 6978),
 ('Rare genetic refraction anomaly', 6979),
 ('neoplastic syndrome', 6991),
 ('Autosomal recessive hyperinsulinism due to SUR1 deficiency', 6993),
 ('eyelid disease', 6993),
 ('eye adnexa disease', 6995),
 ('Osteogenesis imperfecta', 6998),
 ('Disorder of biogenic amine metabolism and transport', 7001),
 ('embryonal tumor of neuroepithelial tissue', 7001),
 ('thoracic disease', 7008),
 ('bone remodeling disease', 7013),
 ('Beta-propeller protein-associated neurodegeneration', 7016),
 ('prostate disease', 7016),
 ('malignant colon neoplasm', 7019),
 ('myeloid white cell count', 7019),
 ('hyperplasia', 7023),
 ('Central congenital hypothyroidism', 7024),
 ('lean body mass', 7026),
 ('alcohol dependence', 7028),
 ('cecal disease', 7028),
 ('Disorder of neurotransmitter metabolism and transport', 7030),
 ('encephalomyelitis', 7030),
 ('eye measurement', 7034),
 ('Distal spinal muscular atrophy', 7042),
 ('Rare hyperopia and astigmatism', 7043),
 ('male infertility', 7050),
 ('skin neoplasm', 7054),
 ('gallbladder disease', 7055),
 ('Familial isolated hyperparathyroidism', 7058),
 ('mathematical ability', 7058),
 ('prostate cancer', 7061),
 ('prostate neoplasm', 7063),
 ('Constitutional neutropenia with extra-haematopoietic manifestations', 7067),
 ('COASY protein-associated neurodegeneration', 7069),
 ('Autosomal recessive limb-girdle muscular dystrophy type 2O', 7072),
 ('Unclassified genetic skin disorder', 7073),
 ('glaucoma', 7076),
 ('Paget disease', 7077),
 ('thoracic cancer', 7081),
 ('abnormality of body weight', 7082),
 ('Spinocerebellar ataxia type 2', 7083),
 ('Sickle cell anemia', 7088),
 ('Sickle cell disease and related diseases', 7092),
 ('germ cell and embryonal cancer', 7093),
 ('benign female reproductive system neoplasm', 7095),
 ('infectious disease of the nervous system', 7095),
 ('Flaviviridae infectious disease', 7096),
 ('Alopecia universalis', 7100),
 ('Genetic visceral malformation of the liver, biliary tract, pancreas or spleen',
  7102),
 ('gastroesophageal disease', 7107),
 ('Autosomal recessive sideroblastic anemia', 7111),
 ('HIV-1 infection', 7113),
 ...]
In [188]:
plt.figure(figsize=(15,15))

    
# draw base etwork 
nx.draw_networkx(Graph, pos = layout, node_size = 30, with_labels = False, node_color = 'grey', edge_color = '0.7')

# add bigger nodes in color
top_nodes = [node[0] for node in total_ranking_g][:10]
labels = {i: i for i in top_nodes}
colors = ['#1ECD3F', '#0C437E', '#30F77B', '#E0016D', '#E0C6F4', '#A402CF', '#6C5020', '#014F05', '#3636DE', '#0E82F6']
big_size = 3000
sizes = [big_size/(i+1) for i in range(n)]

nx.draw_networkx_nodes(Graph,pos=layout,nodelist= top_nodes, with_labels = True, node_color=colors, node_size = sizes)

#nx.draw_networkx_labels(G,layout, labels,font_size=16, font_color = 'k') 


plt.title('Overall ranking_g', fontsize = 20)
plt.savefig('overall_nx.png')
plt.axis("off")
plt.show()

5. Disease to disease Network

In [61]:
raw_data_predicate = pd.read_csv('COVID_KG_sample.csv')
raw_data_predicate.head()
Out[61]:
subject predicate object
0 00340eea543336d54adda18236424de6a5e91c9d isAboutDisease EFO_0000544
1 00340eea543336d54adda18236424de6a5e91c9d isAboutDisease EFO_0003106
2 00340eea543336d54adda18236424de6a5e91c9d isAboutDisease EFO_0009727
3 00340eea543336d54adda18236424de6a5e91c9d isAboutDisease HP_0001945
4 00340eea543336d54adda18236424de6a5e91c9d isAboutDisease HP_0012735
In [62]:
predicate = raw_data_predicate.predicate
predicate.value_counts()
Out[62]:
isAssociatedTo              176088
hasGeneticClue              103103
belongsToTherapeuticArea     22142
isASpecific                  17548
isAboutDisease                5986
isAboutTarget                 2967
Name: predicate, dtype: int64
In [63]:
predicate_type = np.unique( list(predicate))

Disease and genes informations

In [64]:
data_full = {}

for pred in predicate_type:
    sub_df = raw_data_predicate[raw_data.predicate == pred]
    node_pairs = []
    for i in range(len(sub_df)):
        pair_edge = sub_df.iloc[i]
        node_pairs.append((pair_edge['subject'],pair_edge['object']))
    data_full[pred] = node_pairs
In [65]:
# edge list between diseases
dis_to_dis = [(diseases_names[link[0]],diseases_names[link[1]]) for link in data_full['hasGeneticClue']]

Graph duplication and update

In [66]:
Graph2 = Graph.copy()
In [67]:
Graph2.add_edges_from(dis_to_dis)
In [68]:
#layout2 = nx.spring_layout(Graph2)
In [69]:
# Save
#np.save('layout2.npy', layout) 

# Load
layout_dd = np.load('layout2.npy',allow_pickle='TRUE').item()
In [70]:
print('Number of nodes: {}'.format(len(Graph2.nodes)))
print('Number of edges: {}'.format(len(Graph2.edges)))
Number of nodes: 7316
Number of edges: 123644
In [157]:
# Taking the previous overall score (total_ranking), create subrgaphs around each central node defined
def top_subgraphs(G, total_ranking, n):
    
    sub_graphs = {}
    
    top_diseases = [d[0] for d in total_ranking][:n]
    
    edge_list = list(G.edges)
    
    for d in top_diseases:
        
        sub_graphs[d] = {}
        
         
        sub_edglist = [link for link in edge_list if link[0] == d]
        
        sub_graphs[d]['edgelist'] = sub_edglist
        
        sub_graphs[d]['node_list'] = [link[1] for link in sub_edglist if link[0] == d]
        
        sub_graphs[d]['gene_nodes'] = [link[1] for link in sub_edglist if Graph2.nodes[link[1]]['label'] == 'Gene']
        
        sub_graphs[d]['disease_nodes'] = [link[1] for link in sub_edglist if Graph2.nodes[link[1]]['label'] == 'Disease']
        
        sub_graphs[d]['Total Nodes'] = len(sub_graphs[d]['node_list'])
        
        sub_graphs[d]['Total Genes'] = len(sub_graphs[d]['gene_nodes'])
        
        sub_graphs[d]['Total Diseases'] = len(sub_graphs[d]['disease_nodes'])
        
        extended = sub_graphs[d]['node_list'].append(d) 
    
        new_G = nx.Graph()
        
        #new_G.add_nodes_from(sub_graphs[d]['node_list'])
        
        new_G.add_edges_from(sub_graphs[d]['edgelist'])
        
        sub_graphs[d]['layout'] = nx.spring_layout(new_G)
        
        sub_graphs[d]['sub_graph'] = new_G
        
        
    return sub_graphs   
In [173]:
sub_graphs_top = top_subgraphs(Graph2, total_ranking_d, 5)
In [162]:
results = {}
for d in sub_graphs_top:
    results[d] = {}
    results[d]['Total Nodes'] = sub_graphs_top[d]['Total Nodes']
    results[d]['Total Genes'] = sub_graphs_top[d]['Total Genes']
    results[d]['Total Diseases'] = sub_graphs_top[d]['Total Diseases']
In [164]:
pd.DataFrame.from_dict(results).to_csv('top_nodes_comparison.csv')
In [174]:
sub_graph_covid = top_subgraphs(Graph2, covid_score_list, 5)
In [175]:
# Draw each central disease with its own network
def draw_disease_top(sub_graphs_top, name, classification):
    
    plt.figure(figsize=(16,10))
    
    G = sub_graphs_top['sub_graph']
    
    layout = sub_graphs_top['layout']
    
    # draw base etwork 
    nx.draw_networkx(G, pos = layout, node_size = 30, with_labels = False, node_color = 'grey', edge_color = '0.7')
    
    # add bigger nodes in color
    covid = 'COVID'
    labels_minor = {i: i for i in sub_graphs_top['node_list']}
    label_name = {name: name.upper()}
    label_covid = {covid: covid}
        
    diseases_nodes = sub_graphs_top['disease_nodes']
    
    genes_nodes = sub_graphs_top['gene_nodes']
    
    try:
        del labels_minor[name]
    except:
        print('ok')
    
    nx.draw_networkx_nodes(G,pos=layout,nodelist= genes_nodes, with_labels = True, node_color='green', node_size = 700)

    nx.draw_networkx_nodes(G,pos=layout,nodelist= diseases_nodes, with_labels = True, node_color='red', node_size = 1000)

    nx.draw_networkx_nodes(G,pos=layout,nodelist= [name], with_labels = True, node_color='yellow', node_size = 5000)

    nx.draw_networkx_nodes(G,pos=layout,nodelist= [covid], with_labels = True, node_color='blue', node_size = 2200)

    nx.draw_networkx_labels(G,layout, labels_minor, font_size=12, font_color = 'k') 
    nx.draw_networkx_labels(G,layout, label_name, font_size=15, font_color = 'k')
    nx.draw_networkx_labels(G,layout, label_covid, font_size=15, font_color = 'white')
    
    
    plt.title(name.upper(), fontsize = 20)
    plt.savefig(classification + 'final' + str(name)+'.png')
    plt.axis("off")
    plt.show()
In [176]:
for d in sub_graphs_top:
    draw_disease_top(sub_graphs_top[d], d, 'OVERALL')
In [177]:
for d in sub_graph_covid:
    draw_disease_top(sub_graph_covid[d], d, 'COVID')

6. Clustering

In [76]:
# group diseases in which category of diseases they belong to
generic = {}
for link in data_full['isASpecific']:
    try:
        generic[diseases_names[link[1]]].append(diseases_names[link[0]])
    except:
        generic[diseases_names[link[1]]] = []
        generic[diseases_names[link[1]]].append(diseases_names[link[0]])
    
In [77]:
generic_names = list(generic.keys())
In [79]:
# rank them according to size
generic_sizes = [(gen, len(generic[gen])) for gen in generic]
generic_sizes_dict = {gen: len(generic[gen]) for gen in generic}
In [80]:
# rank them according to link to covid
generic_covid_score = []
generic_covid_score_dict = {}

for gen in generic:
    belong = generic[gen]
    scores = []
    for n in belong:
        try:
            scores.append(covid_score[n])
        except KeyError:
            continue
    generic_covid_score.append((gen, np.sum(scores)))
    generic_covid_score_dict[gen] = np.sum(scores)
In [83]:
#sort
generic_sizes.sort(key=custom_sort, reverse = True)
generic_covid_score.sort(key=custom_sort, reverse = True)
In [84]:
# draw the clusters (categories of disease) around the covid on different paramters
def draw_covid_generic(generic_score, n, name, edge_factor, node_factor):
    nodes_and_score = generic_score[:n]
    nodes = [nodes_and_score[i][0] for i in range(len(nodes_and_score))]
    
    w_edges = []
    simple_edges = []
    for node in nodes_and_score:
        w_edges.append(('COVID', node[0], node[1]))
        simple_edges.append(('COVID', node[0]))
    G = nx.Graph()
    
    G.add_weighted_edges_from(w_edges)
    
    layout = nx.spring_layout(G)
    plt.figure(figsize=(16,10))
    # draw base etwork 
    nx.draw_networkx(G, pos = layout, node_size = 30, with_labels = False, node_color = 'grey', edge_color = '0.7')
    
    # add bigger nodes in color
    covid = 'COVID'
    labels_minor = {nodes[i]: nodes[i] for i in range(len(nodes))}
    label_covid = {covid: covid}
    
    edge_size = [nodes_and_score[i][1]*edge_factor for i in range(len(nodes_and_score))]
    
    node_size = [nodes_and_score[i][1]*node_factor for i in range(len(nodes_and_score))]
    
    nx.draw_networkx_edges(G, pos = layout, edgelist = simple_edges, width = edge_size, edge_color = 'grey')
    
    nx.draw_networkx_nodes(G,pos=layout,nodelist= nodes,node_size =  node_size, with_labels = True, node_color='red')

    nx.draw_networkx_nodes(G,pos=layout,nodelist= [covid], with_labels = True, node_color='blue', node_size = 3000)

    nx.draw_networkx_labels(G,layout, labels_minor, font_size=12, font_color = 'k') 
    
    nx.draw_networkx_labels(G,layout, label_covid, font_size=15, font_color = 'white')
    
    
    plt.title(name.upper(), fontsize = 20)
    plt.savefig('final' + str(name)+'.png')
    plt.axis("off")
    plt.show()
        
In [190]:
draw_covid_generic(generic_covid_score, 10, 'COVID SCORE', edge_factor = 20, node_factor = 1e4)
In [189]:
draw_covid_generic(generic_sizes, 10, 'SIZE', edge_factor = 0.05, node_factor = 10)

Let's norm the influence to the size

In [88]:
normed_generic = [(gen, float(generic_covid_score_dict[gen]/generic_sizes_dict[gen])) for gen in generic_sizes_dict if generic_sizes_dict[gen] !=0]
In [89]:
normed_generic.sort(key=custom_sort, reverse = True)
In [90]:
draw_covid_generic(normed_generic, 30, 'NORMED', edge_factor = 3e2, node_factor = 5e4)
In [ ]: